{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# SPNet: Proposed Splits"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## COCO Stuff"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pickle\n",
    "import torch\n",
    "import torch.nn.functional as F\n",
    "import os.path as osp\n",
    "\n",
    "datadir = 'data/datasets/cocostuff'\n",
    "val_size = 2000\n",
    "\n",
    "all_labels = np.genfromtxt(datadir+'/labels_2.txt', delimiter='\\t', usecols=1, dtype='str')\n",
    "seen_classes = np.load(datadir+'/split/seen_cls.npy')\n",
    "seenval_classes = np.load(datadir+'/split/seenval_cls.npy')\n",
    "train_classes = np.asarray(np.concatenate([seen_classes, seenval_classes]),dtype=int)\n",
    "novel_classes = np.load(datadir+'/split/novel_cls.npy')\n",
    "class_emb = np.concatenate([pickle.load(open(datadir+'/word_vectors/fasttext.pkl', \"rb\")), pickle.load(open(datadir+'/word_vectors/word2vec.pkl', \"rb\"))], axis = 1)\n",
    "class_emb = F.normalize(torch.tensor(class_emb), p=2, dim=1)#.cuda()\n",
    "train_images = np.load(datadir+'/split/train_list.npy')\n",
    "val_images = train_images[-val_size:]\n",
    "test_images = np.load(datadir+'/split/test_list.npy')\n",
    "inverse_dict = pickle.load(open(datadir+'/split/inverse_dict_train.pkl', 'rb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total classes: 182\n",
      "Training classes: 155+12\n",
      "Test classes: 15\n",
      "Embedding dimension: 600\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(\"Total classes: {}\".format(train_classes.size+novel_classes.size))\n",
    "print(\"Training classes: {}+{}\".format(seen_classes.size,seenval_classes.size))\n",
    "print(\"Test classes: {}\".format(novel_classes.size))\n",
    "print(\"Embedding dimension: {}\\n\".format(class_emb.shape[1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train embedding matrix dimension : (167, 600)\n",
      "# images for training: 118287\n",
      "# train/val images for finding threshold: 116287/2000\n",
      "Train class list : person, bicycle, car, airplane, bus, train, truck, boat, traffic light, fire hydrant, street sign, stop sign, parking meter, bench, bird, cat, dog, horse, sheep, bear, zebra, hat, backpack, umbrella, shoe, eye glasses, handbag, tie, skis, snowboard, sports ball, kite, baseball glove, surfboard, tennis racket, bottle, plate, cup, fork, knife, spoon, bowl, apple, sandwich, orange, broccoli, hot dog, pizza, donut, cake, couch, potted plant, bed, mirror, dining table, window, desk, toilet, door, tv, laptop, mouse, remote, cell phone, microwave, oven, toaster, sink, blender, book, clock, vase, teddy bear, hair drier, toothbrush, hair brush, banner, blanket, bridge, building-other, bush, cabinet, cage, carpet, ceiling-other, ceiling-tile, cloth, clothes, counter, cupboard, curtain, desk-stuff, dirt, door-stuff, fence, floor-marble, floor-other, floor-stone, floor-tile, floor-wood, flower, fog, food-other, fruit, furniture-other, gravel, ground-other, hill, house, leaves, mat, metal, mirror-stuff, moss, mountain, mud, net, paper, pillow, plant-other, plastic, platform, railing, railroad, rock, roof, rug, salad, sand, sea, shelf, sky-other, skyscraper, snow, solid-other, stairs, stone, straw, structural-other, table, tent, textile-other, towel, vegetable, wall-brick, wall-other, wall-panel, wall-stone, wall-tile, wall-wood, water-other, waterdrops, window-blind, window-other, wood, motorcycle, elephant, baseball bat, wine glass, banana, chair, keyboard, refrigerator, branch, light, napkin, pavement\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(\"Train embedding matrix dimension : {}\".format(class_emb[train_classes].numpy().shape))\n",
    "print(\"# images for training: {}\".format(train_images.size))\n",
    "print(\"# train/val images for finding threshold: {}/{}\".format(train_images[:-val_size].size, train_images[-val_size:].size))\n",
    "print(\"Train class list : {}\\n\".format(\", \".join (all_labels[train_classes])))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test embedding matrix dimension : (15, 600)\n",
      "# images for testing: 5000\n",
      "Test/novel class list : cow, giraffe, suitcase, frisbee, skateboard, carrot, scissors, cardboard, clouds, grass, playingfield, river, road, tree, wall-concrete\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(\"Test embedding matrix dimension : {}\".format(class_emb[novel_classes].numpy().shape))\n",
    "print(\"# images for testing: {}\".format(test_images.size))\n",
    "print(\"Test/novel class list : {}\\n\".format(\", \".join (all_labels[novel_classes])))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "For 2-shot Few-label training we use these images:\n",
      "cow            : 000000438154.jpg 000000368205.jpg\n",
      "giraffe        : 000000384230.jpg 000000073075.jpg\n",
      "suitcase       : 000000136433.jpg 000000406245.jpg\n",
      "frisbee        : 000000109503.jpg 000000489834.jpg\n",
      "skateboard     : 000000458677.jpg 000000476827.jpg\n",
      "carrot         : 000000298189.jpg 000000111549.jpg\n",
      "scissors       : 000000089738.jpg 000000078478.jpg\n",
      "cardboard      : 000000105564.jpg 000000107596.jpg\n",
      "clouds         : 000000010222.jpg 000000294228.jpg\n",
      "grass          : 000000378491.jpg 000000144904.jpg\n",
      "playingfield   : 000000109971.jpg 000000233042.jpg\n",
      "river          : 000000454610.jpg 000000318415.jpg\n",
      "road           : 000000144904.jpg 000000161738.jpg\n",
      "tree           : 000000378491.jpg 000000384230.jpg\n",
      "wall-concrete  : 000000481298.jpg 000000141228.jpg\n"
     ]
    }
   ],
   "source": [
    "print(\"For 2-shot Few-label training we use these images:\")\n",
    "nshot = 2 # please change this number accordingly to get 1, 2, 5, 10, 20-shot training set\n",
    "for key in novel_classes:\n",
    "    if(inverse_dict[key].size >0):\n",
    "        file_index = inverse_dict[key][0:nshot]\n",
    "        class_name = all_labels[key]\n",
    "        example_file = \" \". join([osp.basename(train_images[fi])[:-3]+'jpg' for fi in file_index])\n",
    "        print(\"{0: <14} : {1}\".format(class_name, example_file))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## PASCAL-VOC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pickle\n",
    "import torch\n",
    "import torch.nn.functional as F\n",
    "\n",
    "datadir = 'data/datasets/voc12'\n",
    "val_size = 500\n",
    "\n",
    "all_labels = np.genfromtxt(datadir+'/labels_2.txt', delimiter='\\t', usecols=1, dtype='str')\n",
    "seen_classes = np.load(datadir+'/split/seen_cls.npy')\n",
    "seenval_classes = np.load(datadir+'/split/seenval_cls.npy')\n",
    "train_classes = np.asarray(np.concatenate([seen_classes, seenval_classes]),dtype=int)\n",
    "novel_classes = np.load(datadir+'/split/novel_cls.npy')\n",
    "class_emb = np.concatenate([pickle.load(open(datadir+'/word_vectors/fasttext.pkl', \"rb\")), pickle.load(open(datadir+'/word_vectors/word2vec.pkl', \"rb\"))], axis = 1)\n",
    "class_emb = F.normalize(torch.tensor(class_emb), p=2, dim=1)#.cuda()\n",
    "train_images = np.load(datadir+'/split/train_list.npy')\n",
    "val_images = train_images[-val_size:]\n",
    "test_images = np.load(datadir+'/split/test_list.npy')\n",
    "inverse_dict = pickle.load(open(datadir+'/split/inverse_dict_train.pkl', 'rb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total classes: 20\n",
      "Training classes: 12+3\n",
      "Test classes: 5\n",
      "Embedding dimension: 600\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(\"Total classes: {}\".format(seen_classes.size+seenval_classes.size+novel_classes.size))\n",
    "print(\"Training classes: {}+{}\".format(seen_classes.size,seenval_classes.size))\n",
    "print(\"Test classes: {}\".format(novel_classes.size))\n",
    "print(\"Embedding dimension: {}\\n\".format(class_emb.shape[1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train embedding matrix dimension : (15, 600)\n",
      "# images for training: 11685\n",
      "# train/val images for finding threshold: 11185/500\n",
      "Train class list : aeroplane, bicycle, bird, boat, bottle, bus, car, cat, chair, cow, diningtable, dog, horse, motorbike, person\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(\"Train embedding matrix dimension : {}\".format(class_emb[train_classes].numpy().shape))\n",
    "print(\"# images for training: {}\".format(train_images.size))\n",
    "print(\"# train/val images for finding threshold: {}/{}\".format(train_images[:-val_size].size, train_images[-val_size:].size))\n",
    "print(\"Train class list : {}\\n\".format(\", \".join (all_labels[train_classes])))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test embedding matrix dimension : (5, 600)\n",
      "# images for testing: 1449\n",
      "Test/novel class list : potted_plant, sheep, sofa, train, tv\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(\"Test embedding matrix dimension : {}\".format(class_emb[novel_classes].numpy().shape))\n",
    "print(\"# images for testing: {}\".format(test_images.size))\n",
    "print(\"Test/novel class list : {}\\n\".format(\", \".join (all_labels[novel_classes])))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "For 2-shot Few-label training we use these images:\n",
      "potted_plant   : 2008_007973.jpg 2010_003302.jpg\n",
      "sheep          : 2010_003806.jpg 2008_005635.jpg\n",
      "sofa           : 2011_002184.jpg 2009_001605.jpg\n",
      "train          : 2010_002930.jpg 2008_001105.jpg\n",
      "tv             : 2008_005254.jpg 2011_001357.jpg\n"
     ]
    }
   ],
   "source": [
    "print(\"For 2-shot Few-label training we use these images:\")\n",
    "nshot = 2 # please change this number accordingly to get 1, 2, 5, 10, 20-shot training set\n",
    "for key in novel_classes:\n",
    "    if(inverse_dict[key].size >0):\n",
    "        file_index = inverse_dict[key][0:nshot]\n",
    "        class_name = all_labels[key]\n",
    "        example_file = \" \". join([osp.basename(train_images[fi])[:-3]+'jpg' for fi in file_index])\n",
    "        print(\"{0: <14} : {1}\".format(class_name, example_file))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
